
Texture2D<float3> g_RandDir;
Texture2D<float>  g_BackDepthMap;
//Compress normal and depth into a map
Texture2D<float4> g_NormalDepthMap;
Texture1D<float2> g_Dirs;

#define M_PI 3.14159265f

SamplerState samNearest
{
    Filter   = MIN_MAG_MIP_POINT;
    AddressU = Clamp;
    AddressV = Clamp;
};

//----------------------------------------------------------------------------------

cbuffer cb1
{
	bool   g_bPers;
    float  g_NumSteps;
    float  g_NumDir;
    float  g_R;
    float  g_inv_R;
    float  g_sqr_R;
    float  g_AngleBias;
    float  g_TanAngleBias;
    float  g_Attenuation;
    float  g_Contrast;
    float  g_AspectRatio;
    float  g_InvAspectRatio;
    //Those 2 parameters are used for perspective mode. They are useless in ortho mode.
    float2 g_FocalLen;
    float2 g_InvFocalLen;
    
    float2 g_InvResolution;
    float2 g_Resolution;
    float2 g_ZLinParams;
    
     //Those 2 parameters are used for ortho mode. They are useless in perspective mode.
     //((right-left)/2,(top-bottom)/2)
    float2 g_ViewFieldLen;
    float2 g_InvViewFieldLen;
}

//----------------------------------------------------------------------------------

struct VS_INPUT
{
    float4 Pos : POSITION;
    float3 UV : TEXCOORD0;
};

struct PostProc_VSOut
{
    float4 pos   : SV_Position;
    float2 tex   : TEXCOORD0;
    float2 texUV : TEXCOORD1;
};

// Vertex shader that generates a full screen quad with texcoords
PostProc_VSOut FullScreenQuadVS(VS_INPUT IN)
{
    PostProc_VSOut output = (PostProc_VSOut)0.0;
    output.pos=IN.Pos;
    output.texUV=IN.UV.xy;
    output.tex=IN.UV.xy* float2( 2.0f, -2.0f ) + float2( -1.0f, 1.0f);
    output.tex/=g_FocalLen;
    return output;
}


//----------------------------------------------------------------------------------
float tangent(float3 P, float3 S)
{
    return (P.z - S.z) / length(S.xy - P.xy);
}

//----------------------------------------------------------------------------------
float3 uv_to_eye(float2 uv, float eye_z)
{
	uv = (uv * float2(2.0, -2.0) - float2(1.0, -1.0));
	float3 pos=(float3)0;
	if(g_bPers)
	{
		pos= float3(uv * g_InvFocalLen * eye_z, eye_z);
    }
    else
    {
		pos= float3(uv * g_ViewFieldLen, eye_z);
    }
    return pos;
}

//----------------------------------------------------------------------------------
float3 fetch_eye_pos(float2 uv)
{
    float z = g_NormalDepthMap.SampleLevel(samNearest, float3(uv, 0), 0).w;
    return uv_to_eye(uv, z);
}

//----------------------------------------------------------------------------------
float3 fetch_eye_pos_back(float2 uv)
{
    float z = g_BackDepthMap.SampleLevel(samNearest, float3(uv, 0), 0);
    return uv_to_eye(uv, z);
}

//----------------------------------------------------------------------------------
float3 tangent_eye_pos(float2 uv, float4 tangentPlane)
{
    // view vector going through the surface point at uv
    float3 V = fetch_eye_pos(uv);
    float NdotV = dot(tangentPlane.xyz, V);
    // intersect with tangent plane except for silhouette edges
    if (NdotV < 0.0) V *= (tangentPlane.w / NdotV);
    return V;
}

float length2(float3 v) { return dot(v, v); } 

//----------------------------------------------------------------------------------
float3 min_diff(float3 P, float3 Pr, float3 Pl)
{
    float3 V1 = Pr - P;
    float3 V2 = P - Pl;
    return (length2(V1) < length2(V2)) ? V1 : V2;
}

//----------------------------------------------------------------------------------
float falloff(float r)
{
	//return exp(- 16*r*r);
   return 1.0f - g_Attenuation*r*r;
}

//----------------------------------------------------------------------------------
float2 snap_uv_offset(float2 uv)
{
    return round(uv * g_Resolution) * g_InvResolution;
}

float2 snap_uv_coord(float2 uv)
{
    //return (floor(uv * g_Resolution) + 0.5f) * g_InvResolution;
    return uv - (frac(uv * g_Resolution) - 0.5f) * g_InvResolution;
}

//----------------------------------------------------------------------------------
float tan_to_sin(float x)
{
    return x / sqrt(1.0f + x*x);
}

//----------------------------------------------------------------------------------
float3 tangent_vector(float2 deltaUV, float3 dPdu, float3 dPdv)
{
    return deltaUV.x * dPdu + deltaUV.y * dPdv;
}

//----------------------------------------------------------------------------------
float tangent(float3 T)
{
    return -T.z / length(T.xy);
}

//----------------------------------------------------------------------------------
float biased_tangent(float3 T)
{
    float phi = atan(tangent(T)) + g_AngleBias;
    return tan(min(phi, M_PI*0.5));
}

//----------------------------------------------------------------------------------
float AccumulatedHorizonOcclusion(float2 deltaUV, 
                                  float2 uv0, 
                                  float3 P, 
                                  float numSteps, 
                                  float randstep,
                                  float3 dPdu,
                                  float3 dPdv )
{
	// Jitter starting point within the first sample distance
    float2 uvs = (uv0 +deltaUV) + randstep * deltaUV;
    
    // Snap first sample uv and initialize horizon tangent
    float2 snapped_duv = snap_uv_offset(uvs - uv0);
    float3 T = tangent_vector(snapped_duv, dPdu, dPdv);
    float tanH = tangent(T) + g_TanAngleBias;
    
    float ao = 0;
    float h0 = 0;

	float2 uv=uvs;
    for(float j = 0; j < numSteps; ++j) {
		if(uv.x>1.0f||uv.x<0.0f) break;
        if(uv.y>1.0f||uv.y<0.0f) break;
        float2 snapped_uv = snap_uv_coord(uv);
        float3 S=(float3)0;

		S= fetch_eye_pos(snapped_uv);
		
		uv += deltaUV;
        // Ignore any samples outside the radius of influence
        float d2 = length2(S - P);
        if (d2 < g_sqr_R) {
            float tanS = tangent(P, S);

            [branch]
            if (tanS > tanH) {
           
                // Compute tangent vector associated with snapped_uv
                float2 snapped_duv = snapped_uv - uv0;
                float3 T = tangent_vector(snapped_duv, dPdu, dPdv);
                float tanT = tangent(T) + g_TanAngleBias;

                // Compute AO between tangent T and sample S
                float sinS = tan_to_sin(tanS);
                float sinT = tan_to_sin(tanT);
                float r = sqrt(d2) * g_inv_R;
                float h = sinS - sinT;
                ao += falloff(r) * (h - h0);
                h0 = h;

                // Update the current horizon angle
                tanH = tanS;
               
            }
        }
    }

    return ao;
}


//----------------------------------------------------------------------------------
float4 HORIZON_BASED_AO_PS(PostProc_VSOut IN ) : SV_TARGET
{
    float3 P = fetch_eye_pos(IN.texUV);
    
    if(P.z<1.0e-10f)
		return 1.0f;
    
    // Project the radius of influence g_R from eye space to texture space.
    // The scaling by 0.5 is to go from [-1,1] to [0,1].
    float2 step_size =(float2)0;
    if(g_bPers)
    {
		//perspective mode
		step_size=0.5 * g_R  * g_FocalLen / P.z;
    }else
    {
		//ortho mode
		step_size=0.5 * g_R  * g_InvViewFieldLen;
    }
  
    // Early out if the projected radius is smaller than 1 pixel.
    float numSteps = min( g_NumSteps, min(step_size.x * g_Resolution.x, step_size.y * g_Resolution.y));
  
    if( numSteps < 1.0 ) return 1.0;
    step_size = step_size / ( numSteps + 1 );

    // Nearest neighbor pixels on the tangent plane
    float3 Pr, Pl, Pt, Pb;
    float4 tangentPlane;
 
    float3 N = normalize(g_NormalDepthMap.SampleLevel(samNearest, float3(IN.texUV, 0), 0).xyz);
    tangentPlane = float4(N, dot(P, N));
    Pr = tangent_eye_pos(IN.texUV + float2(g_InvResolution.x, 0), tangentPlane);
    Pl = tangent_eye_pos(IN.texUV + float2(-g_InvResolution.x, 0), tangentPlane);
    Pt = tangent_eye_pos(IN.texUV + float2(0, g_InvResolution.y), tangentPlane);
    Pb = tangent_eye_pos(IN.texUV + float2(0, -g_InvResolution.y), tangentPlane);
    
    // Screen-aligned basis for the tangent plane
    float3 dPdu = min_diff(P, Pr, Pl);
    float3 dPdv = min_diff(P, Pt, Pb) * (g_Resolution.y * g_InvResolution.x);

    // (cos(alpha),sin(alpha),jitter)
    float3 rand_Dir = g_RandDir.Load(int3((int)IN.pos.x&63, (int)IN.pos.y&63, 0)).xyz;

    float ao = 0;
    float d;
    // this switch gets unrolled by the HLSL compiler
     for (d = 0; d < g_NumDir; ++d){
			float2 dir=g_Dirs.Load(int2(d,0)).xy;
            float2 deltaUV = float2(dir.x*rand_Dir.x - dir.y*rand_Dir.y, 
                                dir.x*rand_Dir.y + dir.y*rand_Dir.x) 
                                * step_size.xy;
             ao += AccumulatedHorizonOcclusion(deltaUV, IN.texUV, P, numSteps, rand_Dir.z, dPdu, dPdv);
             }
     
   

    return 1.0 - ao / g_NumDir * g_Contrast;
}

//----------------------------------------------------------------------------------
technique10 HORIZON_BASED_AO_NLD_Pass
{
    pass p0
    {
        SetVertexShader( CompileShader( vs_4_0, FullScreenQuadVS() ) );
        SetGeometryShader( NULL );
        SetPixelShader( CompileShader( ps_4_0, HORIZON_BASED_AO_PS( ) ) );
    }
}
